library(brms)
library(xtable)
library(ggplot2)
  theme_set(theme_bw(base_size = 14))
library(cowplot)
  library(plyr)
source("BrmsTables.R")
load("AllPapers.RData")



#recodings needed to make the different analyses run (dont add/change any information in the data)
pap$discipline <- revalue(pap$area, c( "Biomedicine &\nhealth sciences"="HS",
                                       "Life sciences"="LS",
                                       "Physical sciences"="PS",
                                       "Soc. sciences\n& humanities"="SS"
                                    ))
pap$final.decision <- ifelse(pap$FinalAccept, "Reject","Accept")
pap$ord.decision <- revalue(as.character(pap$decRec), c("1" = "Accept",
                                          "2"="Minor revisions",
                                          "3"="Major revisions",
                                          "4"="Reject Major"))
pap$ediRej <- ifelse(pap$decRec == 4, 1, -1)

######################################################################
#Descriptives

pap <- subset(pap, is.na(IFRounded)==F)

aut <- apply(pap[, c("nMales", "nFemales", "nAuthors")], 2, sum, na.rm=T)
aut
aut[3] - sum(aut[1:2])
aut / aut[3]
(aut[3] - sum(aut[1:2])) / aut[3]

ref <- apply(pap[, c("malRev", "femRev", "undRev")], 2, sum, na.rm=T)
ref
ref / sum(ref)

tab <- table(pap$year)
mean(tab[3:9])

# Proportion of known genders

# N of cases
ncases <- function(x)length(unique(x))
ncases(pap$Manuscript_ID)
ncases(pap$Journal)
ag <- aggregate(Journal ~ discipline, subset(pap, is.na(IFRounded)==F), ncases)
ag
ag <- aggregate(Manuscript_ID ~ discipline, pap, ncases)
sum(ag[,2])

sum(pap$nRev) / ncases(pap$Manuscript_ID)

# Rejections
round(prop.table(xtabs(~ rejected + discipline, pap), 2), 3)
round(prop.table(xtabs(~ FinalAccept + discipline, pap), 2), 3)

# Women proportion
1 - mean(pap$autRatFem)
1 - mean(pap$revRatFem)

ag <- aggregate(autRatFem ~ discipline, pap, mean)
round(ag$autRatFem, 2)
ag <- aggregate(revRatFem ~ discipline, pap, mean)
round(ag$revRatFem, 2)

summary(pap$revRatFem)
ggplot(pap, aes(x=revRatFem)) + geom_histogram(bins=10) + facet_wrap(~ discipline, scales="free_y")

ag <- aggregate(cbind(autRatFem, revRatFem) ~ discipline, pap, mean)
xtable(ag, digits=c(0,0,3,3))

#IF per discipline
agM <- aggregate(IFRounded ~ discipline, pap, mean)
agSd <- aggregate(IFRounded ~ discipline, pap, sd)

tab <- as.data.frame(xtabs(~ final.decision + first.author.gender + area, pap))
leg <-  ggplot(tab, aes(y=Freq, x=final.decision, fill=first.author.gender)) + geom_col(position="fill",show.legend=T) + facet_wrap(~ area) +  scale_fill_viridis_d() + labs(x="Final Decision", y="Proportion", fill="Author\ngender")
plt1 <- ggplot(tab, aes(y=Freq, x=final.decision, fill=first.author.gender)) + geom_col(position="fill",show.legend=F) + facet_wrap(~ area) +  scale_fill_viridis_d() + labs(x="Final Decision", y="Proportion", fill="First author\ngender", title="First author")

tab2 <- as.data.frame(xtabs(~ final.decision + last.author.gender + area, pap))
plt2 <- ggplot(tab2, aes(y=Freq, x=final.decision, fill=last.author.gender)) + geom_col(position="fill",show.legend=F) + facet_wrap(~ area) +  scale_fill_viridis_d() + labs(x="Final Decision", y="Proportion", fill="Last author\ngender",title="Last author")

legend <- get_legend(leg)
plot_grid(plt1, plt2, legend, rel_widths=c(0.45,0.45,0.1),labels=c("A","B",""), nrow=1)


# First-last authors

xtabs(~first.author.gender + last.author.gender, pap)
xtabs(~nAuthors + first.author.gender + last.author.gender, subset(pap, nAuthors<5))

#######################################################################

# load models by discipline

  files <- dir(path="./ModelsByDiscipline/", pattern="RData")

  load(paste0("ModelsByDiscipline/", file))

#########################################################################

# Final decision

summary(sc1hs)

# tables should be organised by indicator:
disc <- c("Biom. \\& health sc.","Life sc.","Physical sc.","Social sc.")
vars <- c("(Intercept)", "Women proportion (authors)", "Women proportion (referees)", "Referee recommendation", "Agreement", "IF", "N. of authors", "N. of referees", "PR type: single-blind", "N. of revision rounds")
aut.rat.tab <- multi.tab(list(fin1hs, fin1ls, fin1ps, fin1ss), round.est=3, vars=vars)
names(aut.rat.tab)[3:6] <- disc

vars <- c("(Intercept)", "First author woman", "Last author woman", "Women proportion (referees)", "Referee recommendation", "Agreement", "IF", "N. of authors", "N. of referees", "PR type: single-blind", "N. of revision rounds")
first.tab <- multi.tab(list(fin5hs, fin5ls, fin5ps, fin5ss), round.est=3, vars=vars)
names(first.tab)[3:6] <- disc

vars <- c("(Intercept)", "Solo woman", "All men team", "All women team", "Cross collaboration", "Women proportion (referees)", "Referee recommendation", "Agreement", "IF", "N. of authors", "N. of referees", "PR type: single-blind", "N. of revision rounds")
strat.tab <- multi.tab(list(fin2hs, fin2ls, fin2ps, fin2ss), round.est=3, vars=vars)
names(strat.tab)[3:6] <- disc



# Print the tables

print(xtable(aut.rat.tab[-2], align=c("llcccc")), include.rownames=F, booktabs=T, floating=F, hline.after=c(-1,0, seq(3, nrow(aut.rat.tab)-3, 3), nrow(aut.rat.tab)))

print(xtable(first.tab[-2], align=c("llcccc")), include.rownames=F, booktabs=T, floating=F, hline.after=c(-1,0, seq(3, nrow(first.tab)-3, 3), nrow(first.tab)))

print(xtable(strat.tab[-2], align=c("llcccc")), include.rownames=F, booktabs=T, floating=F, hline.after=c(-1,0, seq(3, nrow(strat.tab)-3, 3), nrow(strat.tab)))

######################################################################

# Gender Matching

disc <- c("Biom. \\& health sc.","Life sc.","Physical sc.","Social sc.")
vars <- c("(Intercept)", "Women proportion (authors)", "IF", "N. of authors",  "PR type: single-blind")
match.tab <- multi.tab(list(match1hs, match1ls, match1ps, match1ss), round.est=3, vars=vars)
names(match.tab)[3:6] <- disc

print(xtable(match.tab[-2], align=c("llcccc")), include.rownames=F, booktabs=T, floating=F, hline.after=c(-1,0, seq(3, nrow(match.tab)-3, 3), nrow(match.tab)))

######################################################################

# Review scores


disc <- c("Biom. \\& health sc.","Life sc.","Physical sc.","Social sc.")


vars <- c("(Intercept)", "Women proportion (authors)", "Women proportion (referees)", "IF", "N. of authors", "N. of referees", "PR type: single-blind")
sc.tab <- multi.tab(list(sc1hs, sc1ls, sc1ps, sc1ss), round.est=3, vars=vars)
names(sc.tab)[3:6] <- disc

print(xtable(sc.tab[-2], align=c("llcccc")), include.rownames=F, booktabs=T, floating=F, hline.after=c(-1,0, seq(3, nrow(sc.tab)-3, 3), nrow(sc.tab)))


vars <- c("(Intercept)", "First author woman", "Last author woman", "Women proportion (referees)", "IF", "N. of authors", "N. of referees", "PR type: single-blind")
sc.tab <- multi.tab(list(sc5hs, sc5ls, sc5ps, sc5ss), round.est=3, vars=vars)
names(sc.tab)[3:6] <- disc

print(xtable(sc.tab[-2], align=c("llcccc")), include.rownames=F, booktabs=T, floating=F, hline.after=c(-1,0, seq(3, nrow(sc.tab)-3, 3), nrow(sc.tab)))
  


vars <- c("(Intercept)", "Solo woman", "All men team", "All women team", "Cross collaboration", "Women proportion (referees)", "IF", "N. of authors", "N. of referees", "PR type: single-blind")
sc.tab <- multi.tab(list(sc2hs, sc2ls, sc2ps, sc2ss), round.est=3, vars=vars)
names(sc.tab)[3:6] <- disc

print(xtable(sc.tab[-2], align=c("llcccc")), include.rownames=F, booktabs=T, floating=F, hline.after=c(-1,0, seq(3, nrow(sc.tab)-3, 3), nrow(sc.tab)))


#####################################################################################################
#Is there an interaction effect between the gender of authors and prType/gender of reviewers

disc <- c("Biom. \\& health sc.","Life sc.","Physical sc.","Social sc.")
vars <- c("(Intercept)", "Women proportion (authors)", "Women proportion (referees)", "PR type: single-blind", "Women (referees) X PR type", "Women (authors) X PR type")
interactionsWitRevScore <- multi.tab(list(sc1hsGenInt4, sc1lsGenInt4, sc1psGenInt4, sc1ssGenInt4), round.est=3, vars=vars)
names(interactionsWitRevScore)[3:6] <- disc
print(xtable(interactionsWitRevScore[-2], align=c("llcccc")), include.rownames=F, booktabs=T, floating=F, hline.after=c(-1,0, seq(3, nrow(interactionsWitRevScore)-3, 3), nrow(interactionsWitRevScore)))

#####################################################################################################
#Does the effect of the authors' gender interact with the review score

disc <- c("Biom. \\& health sc.","Life sc.","Physical sc.","Social sc.")
interactionsWitRevScore <- multi.tab(list(finIntRevHs, finIntRevLs, finIntRevPs, finIntRevSs), round.est=3)
names(interactionsWitRevScore)[3:6] <- disc
print(xtable(interactionsWitRevScore[-2], align=c("llcccc")), include.rownames=F, booktabs=T, floating=F, hline.after=c(-1,0, seq(3, nrow(interactionsWitRevScore)-3, 3), nrow(interactionsWitRevScore)))

#####################################################################################################

#####################################################################################################
#Does the number of revision rounds depend on the gender (for accepted papers)

disc <- c("Biom. \\& health sc.","Life sc.","Physical sc.","Social sc.")
vars <- c("(Intercept)", "Women proportion (authors)", "Women proportion (referees)", "Review score", 
          "Agreement","IF", "N. of authors", "N. of referees", "PR type: single-blind")
rv.low.tab <- multi.tab(list(nrHs,nrLs,nrPs,nrSs), round.est=3, vars=vars)
names(rv.low.tab)[3:6] <- disc
print(xtable(rv.low.tab[-2], align=c("llcccc")), include.rownames=F, booktabs=T, floating=F, hline.after=c(-1,0, seq(3, nrow(rv.low.tab)-3, 3), nrow(rv.low.tab)))

#creating the Bayesian network is done in the script SiBN.R

# BN Predictions
load("bnTest.RData")
summary(test)
table(test$pred)
table(test$autRatFem)

test$score.groups <- as.numeric(cut(test$rev.score, breaks=seq(-0.05, 1.05, 0.1), labels=seq(0,1,0.1)))
table(test$score.groups)

ag <- aggregate(pred ~ score.groups + autRatFem + discipline, test, mean)

ggplot(ag, aes(y=score.groups, x=pred, group=as.factor(autRatFem), color=as.factor(autRatFem))) + geom_line() + facet_wrap(~ discipline)
load("prediction.RData")

b$autFemRat <- factor(b$autFemRat, labels=c("Males", "Females"))


mlt <- reshape2::melt(b[,-3], c("reviewScore", "autFemRat"))
mlt$variable <- factor(mlt$variable, levels=c("notRejectedHS", "notRejectedLS", "notRejectedPS", "notRejectedSS") , labels=c("Biomedicine &\nhealth sciences", "Life sciences", "Physical sciences", "Soc. sciences\n& humanities"))


ggplot(mlt, aes(y=reviewScore, x= value, color=autFemRat)) + geom_line(size=0.8) + facet_wrap(~ variable) + scale_color_viridis_d() + labs(x="Review score", y="Probability of final acceptance", color="Author\ngender")



